Pre-processing of the data

ls_preprocessed <- preprocess_rna(path_rnaseq = 'rnaseq.RData', correct_batch = T, correct_gender = T)

Exploring data

Batch effect correction

print(ls_preprocessed$pbatch_bf)

print(ls_preprocessed$pgender_bf)

print(ls_preprocessed$pbatch_af)

print(ls_preprocessed$pgender_af)

Using all genes

names(ls_preprocessed)
## [1] "p_all"        "rna_all"      "pData_rnaseq" "counts_all"   "vsd_mat"     
## [6] "pbatch_bf"    "pgender_bf"   "pbatch_af"    "pgender_af"
dim(ls_preprocessed$vsd_mat)
## [1] 37984    82
corr_pt <- Hmisc::rcorr(ls_preprocessed$vsd_mat, type = 'spearman')
Heatmap(corr_pt$r, name = "mat", 
        column_km = 3, 
        row_km = 3,
        heatmap_legend_param = list(color_bar = "continuous"), 
        row_names_gp = gpar(fontsize = 8),
        column_names_gp = gpar(fontsize = 8))

# Hierarchical clustering
d <- dist(corr_pt$r)
hc1 <- hclust(d)
plot(hc1, cex = 0.6, hang = -1)

Using top n genes

n_genes <- 10000
vsd_mat <- ls_preprocessed$vsd_mat
variances <- apply(vsd_mat, 1, var)
top_genes <- data.frame(vsd_mat) %>%
  mutate(gene=rownames(.),
         symbol=ls_preprocessed$rna_all$Feature_gene_name,
         variances = variances) %>%
  arrange(desc(variances)) %>%
  dplyr::select(gene, symbol) %>%
  head(n_genes)
vsd_mat5k<- vsd_mat[top_genes$gene,]
rownames(vsd_mat5k) <- top_genes$symbol

corr_pt <- Hmisc::rcorr(vsd_mat5k, type = 'spearman')
Heatmap(corr_pt$r, name = "mat", 
        column_km = 2, 
        row_km = 2,
        heatmap_legend_param = list(color_bar = "continuous"), 
        row_names_gp = gpar(fontsize = 8),
        column_names_gp = gpar(fontsize = 8))

# Hierarchical clustering
d <- dist(corr_pt$r)
hc1 <- hclust(d)
plot(hc1, cex = 0.6, hang = -1)

n_genes <- 5000
vsd_mat <- ls_preprocessed$vsd_mat
variances <- apply(vsd_mat, 1, var)
top_genes <- data.frame(vsd_mat) %>%
  mutate(gene=rownames(.),
         symbol=ls_preprocessed$rna_all$Feature_gene_name,
         variances = variances) %>%
  arrange(desc(variances)) %>%
  dplyr::select(gene, symbol) %>%
  head(n_genes)
vsd_mat5k<- vsd_mat[top_genes$gene,]
rownames(vsd_mat5k) <- top_genes$symbol

corr_pt <- Hmisc::rcorr(vsd_mat5k, type = 'spearman')
Heatmap(corr_pt$r, name = "mat", 
        column_km = 2, 
        row_km = 2,
        heatmap_legend_param = list(color_bar = "continuous"), 
        row_names_gp = gpar(fontsize = 8),
        column_names_gp = gpar(fontsize = 8))

# Hierarchical clustering
d <- dist(corr_pt$r)
hc1 <- hclust(d)
plot(hc1, cex = 0.6, hang = -1)

n_genes <- 1000
vsd_mat <- ls_preprocessed$vsd_mat
variances <- apply(vsd_mat, 1, var)
top_genes <- data.frame(vsd_mat) %>%
  mutate(gene=rownames(.),
         symbol=ls_preprocessed$rna_all$Feature_gene_name,
         variances = variances) %>%
  arrange(desc(variances)) %>%
  dplyr::select(gene, symbol) %>%
  head(n_genes)
vsd_mat5k<- vsd_mat[top_genes$gene,]
rownames(vsd_mat5k) <- top_genes$symbol

corr_pt <- Hmisc::rcorr(vsd_mat5k, type = 'spearman')
Heatmap(corr_pt$r, name = "mat", 
        column_km = 2, 
        row_km = 2,
        heatmap_legend_param = list(color_bar = "continuous"), 
        row_names_gp = gpar(fontsize = 8),
        column_names_gp = gpar(fontsize = 8))

# Hierarchical clustering
d <- dist(corr_pt$r)
hc1 <- hclust(d)
plot(hc1, cex = 0.6, hang = -1)

n_genes <- 500
vsd_mat <- ls_preprocessed$vsd_mat
variances <- apply(vsd_mat, 1, var)
top_genes <- data.frame(vsd_mat) %>%
  mutate(gene=rownames(.),
         symbol=ls_preprocessed$rna_all$Feature_gene_name,
         variances = variances) %>%
  arrange(desc(variances)) %>%
  dplyr::select(gene, symbol) %>%
  head(n_genes)
vsd_mat5k<- vsd_mat[top_genes$gene,]
rownames(vsd_mat5k) <- top_genes$symbol

corr_pt <- Hmisc::rcorr(vsd_mat5k, type = 'spearman')
Heatmap(corr_pt$r, name = "mat", 
        column_km = 2, 
        row_km = 2,
        heatmap_legend_param = list(color_bar = "continuous"), 
        row_names_gp = gpar(fontsize = 8),
        column_names_gp = gpar(fontsize = 8))

# Hierarchical clustering
d <- dist(corr_pt$r)
hc1 <- hclust(d)
plot(hc1, cex = 0.6, hang = -1)

n_genes <- 100
vsd_mat <- ls_preprocessed$vsd_mat
variances <- apply(vsd_mat, 1, var)
top_genes <- data.frame(vsd_mat) %>%
  mutate(gene=rownames(.),
         symbol=ls_preprocessed$rna_all$Feature_gene_name,
         variances = variances) %>%
  arrange(desc(variances)) %>%
  dplyr::select(gene, symbol) %>%
  head(n_genes)
vsd_mat5k<- vsd_mat[top_genes$gene,]
rownames(vsd_mat5k) <- top_genes$symbol

corr_pt <- Hmisc::rcorr(vsd_mat5k, type = 'spearman')
Heatmap(corr_pt$r, name = "mat", 
        column_km = 2, 
        row_km = 2,
        heatmap_legend_param = list(color_bar = "continuous"), 
        row_names_gp = gpar(fontsize = 8),
        column_names_gp = gpar(fontsize = 8))

corr_genes <- Hmisc::rcorr(t(vsd_mat5k), type = 'spearman')
Heatmap(corr_genes$r, name = "mat", 
        column_km = 2, 
        row_km = 2,
        heatmap_legend_param = list(color_bar = "continuous"), 
        row_names_gp = gpar(fontsize = 8),
        column_names_gp = gpar(fontsize = 8))

# Hierarchical clustering
d <- dist(corr_pt$r)
hc1 <- hclust(d)
plot(hc1, cex = 0.6, hang = -1)

Clustering

n_genes <- 10000
vsd_mat <- ls_preprocessed$vsd_mat
variances <- apply(vsd_mat, 1, var)
top_genes <- data.frame(vsd_mat) %>%
  mutate(gene=rownames(.),
         symbol=ls_preprocessed$rna_all$Feature_gene_name,
         variances = variances) %>%
  arrange(desc(variances)) %>%
  dplyr::select(gene, symbol) %>%
  head(n_genes)
vsd_mat5k<- vsd_mat[top_genes$gene,]
rownames(vsd_mat5k) <- top_genes$symbol

corr_pt <- Hmisc::rcorr(vsd_mat5k, type = 'spearman') #all p vals are 0

# Hierarchical clustering
d <- dist(corr_pt$r)
hc1 <- hclust(d)
plot(hc1, cex = 0.6, hang = -1)